#!/usr/bin/env python
# coding=utf-8
# __author__ = 'Yunchao Ling'

def extractEmail(line):
    import re

    result=""

    # zhuanyi = ["&lt;", "&gt;", "&amp;", "&quot;", "&apos"]

    # regex = "^.*?(\w+([ -+.']*?\w+)*[ -+.]*?@[ -+.]*?\w+([ -+.]*?\w+)*[ -+.]*?\.([ -+.]*?\w+)*.*?$"
    # regex = "^.*?(\w+([ -\+\.']*?\w+)*[ -\+\.]*@[ -\+\.]*\w+([ -\+\.]*?\w+)*[ -\+\.]*\.([ -\+\.]*?\w+)*.*?$"
    regex="^.*?(\w+([ \+-\.]?\w+)*@(\w)+((\.\w+)+)).*?$"
    match = re.match(regex, line)

    if match:
        result=match.group(1)

    return result


if __name__ == "__main__":
    infile = open("D:/data/Affiliation_sample.txt", "r")

    for line in infile:
        line = line.rstrip()
        if line.find("@")!=-1:
            line2=extractEmail(line)
            if line2!="":
                print line2









